

clear all
set more off

//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////         Part 0. Variables      ////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//------------------------------------------------------------------------------

use "_DataClean.dta", clear

///////////////////////////////////// index ////////////////////////////////////

// prepare
encode iso_o, gen (code_o)
encode iso_d, gen (code_d)
egen N=max(code_o)
gen D=(iso_o==iso_d) // domestic dummy

/////////////////////////////// Data Generation /////////////////////////////////

// aggregate (manufacturing)
egen flow_0=rowtotal(flow*)

// construct dependent variables: shares
forvalues i = 0/25 {
	gen flow=flow_`i'
	gen vlnflow=log(flow)	
	gen vtrade=(flow>0)
	replace vtrade=1 if D==1
	bysort iso_d: egen vexpend=sum(flow)
	bysort iso_o: egen vworld=sum(vexpend)	
	bysort iso_o: egen vincome=sum(flow)
	gen vexpshare=vexpend/vworld
	gen vincshare=vincome/vworld
	gen vy=flow/vexpend
	gen vya=vy/EM
	gen vincsharea=vincshare/EM
	rename v* *_`i'
	drop flow
}
// construct independent variables: trade costs
gen lndist=log(distw) // distance
gen fC=(im_cost*gdpcap_d+ex_cost*gdpcap_o)/200 // fixed cost
gen w=lngdpcap_o // income
// for short
gen t=lndist
replace t=0 if D==1 // adjust
gen tw=t*w
forvalues i = 0/25 {
	gen r_`i'=log(1+tari_`i')
}
gen r=r_0
drop r_0
gen rw=r*w
gen f=fC/gdp_d // million
replace f=0 if D==1 // adjust
gen fw=f*w

// construct independent variables: real income
gen rho=0.177 // Novy
bysort iso_d: egen lnStone=sum(y_0*(rho*lndist+log(price_FR)))
gen lnr=lngdpcap_d-lnStone 
gen lnpcincome=lngdpcap_o 
//
gen z=lnpcincome*lnr
bysort code_o: egen M=sum(expshare_0*lnr)
bysort code_d: egen MM=sum((1/N)*lnpcincome)	
gen zdm=lnr*(lnpcincome-MM)
gen zdmm=(lnr-M)*(lnpcincome-MM)
drop M MM

// labels
label var D "Internal"
label var t "Distance"
label var r "Tariff"
label var f "Entry cost"
label var tw "Distance $\times$ Income_{ex}"
label var rw "Tariff $\times$ Income_{ex}"
label var fw "Entry cost $\times$ Income_{ex}"
label var z "Income_{im} $\times$ Income_{ex}"
label var w "Income_{ex}"

// industry labels
gen ind=.
merge m:1 ind using "industrylist.dta", keepusing(ind*)
gsort -_merge ind
forvalues i = 0/25 {
    label var flow_`i' `"`=industry[`i'+1]'"'
    label var trade_`i' `"`=industry[`i'+1]'"'
	label var ya_`i' `"`=industry[`i'+1]'"'
	label var y_`i' `"`=industry[`i'+1]'"'
	label var tari_`i' `"`=industry[`i'+1]'"'
}
drop if _merge==2
drop _merge ind*

save "_DataReg.dta", replace


//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/////////////////////            Part 0. Summary        ////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//------------------------------------------------------------------------------

//--------------------------------- Figure 1 -----------------------------------
////////////////////////////////////////////////////////////////////////////////
// freqency of zeros across industry
use "_DataReg.dta", clear

drop if D==1
forvalues i = 0/25 {
	egen fre`i'=mean(1-trade_`i')
}
keep fre*
keep in 1
xpose, clear
gen ind=_n-1
merge 1:m ind using "industrylist.dta", keepusing(industryname) keep(master match) nogen
label var v1 "Zero frequency"
gen v2=1-v1
replace ind=1 if ind!=0
sort ind v1
insobs 1, after(1)
replace industryname=" " if industryname==""
gen n=_n
sum v1 if ind==1
scalar m=100*r(mean)
graph hbar v1 v2, over(industry, sort(n)) legend(lab(1 "No trade") ///
      lab(2 "Trade")) yline(`=scalar(m)') ytitle("Percent of country pairs") ///
	  bar(1, col(blue*1.75)) bar(2, col(yellow)) xsize(3) ysize(4) ///
	  graphregion(color(white)) stack percent label
////////////////////////////////////////////////////////////////////////////////

//------------------------- for Figure 2 use -----------------------------------
////////////////////////////////////////////////////////////////////////////////
// freqency of zeros across industry and country
use "_DataReg.dta", clear
//
egen id_o=group(gdp_o)
egen id_d=group(gdp_d)
replace id_o=76-id_o
replace id_d=76-id_d
keep id_* trade_*
sort id_o id_d
export delimited using "ZERO.txt", replace
////////////////////////////////////////////////////////////////////////////////

//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//////////////////            Part 1. Estimation        ////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//------------------------------------------------------------------------------

//--------------------------------- Table 1 ------------------------------------
////////////////////////////////////////////////////////////////////////////////
use "_DataReg.dta", clear
set more off
set matsize 10000 
rename *_0 *
label var ya " " 

// baseline
local vlist = "t tw r rw f fw z D"
eststo clear 
eststo: tobit ya `vlist' i.code_d#c.w i.code_o i.code_d, ll
eststo: reg ya `vlist' i.code_d#c.w i.code_o i.code_d
replace ya=. if ya==0
eststo: heckman ya `vlist' i.code_d#c.w i.code_o i.code_d, select(`vlist' i.code_o i.code_d) twostep
esttab, drop(*code* _cons select:) star(* 0.1 ** 0.05 *** 0.01) ///
       mtitle("Tobit" "OLS" "Heckit") b(3) se(3) r2 scalars(ll) label nonotes	    
////////////////////////////////////////////////////////////////////////////////	
//------------------------------------------------------------------------------	   


//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//////////////////            Part 2. Sectoral regs        /////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//------------------------------------------------------------------------------

use "_DataReg.dta", clear
set more off
set matsize 10000 

//
eststo clear
local vlist = "t tw r rw f fw z D"
gen r_0=r
forvalues i = 0/25 {
	// tariff
	replace r=r_`i'
	replace rw=r*w
	// reg
	eststo: tobit ya_`i' `vlist' i.code_d#c.w i.code_o i.code_d, ll
	predict hat_ya_`i'
	gen Friction_`i'=hat_ya_`i'-incsharea_`i' if trade_`i'==0
	replace Friction_`i'=ya_`i'-incsharea_`i' if trade_`i'==1
	// t
	gen b_tw=_b[tw]
	gen b_t=-(_b[tw]*w+_b[t])
	sum b_t if b_t>0
	replace b_t=`r(min)' if b_t<0 //*******************************
	bysort code_d: egen tmp=sum(b_t)
	gen beta_t_`i'=b_t/tmp
	bysort code_o: egen M=sum(expshare_`i'*t)
	bysort code_d: egen MM=sum(beta_t_`i'*(t-M))
	gen P_t=-b_t*(t-M-MM)
	drop M MM tmp 
	// r
	gen b_rw=_b[rw]
	gen b_r=-(_b[rw]*w+_b[r])
	sum b_r if b_r>0
	replace b_r=`r(min)' if b_r<0 //*******************************
	bysort code_d: egen tmp=sum(b_r)
	gen beta_r_`i'=b_r/tmp
	bysort code_o: egen M=sum(expshare_`i'*r)
	bysort code_d: egen MM=sum(beta_r_`i'*(r-M))
	gen P_r=-b_r*(r-M-MM)
	drop M MM tmp 	
	// f
	gen b_fw=_b[fw]
	gen b_f=-(_b[fw]*w+_b[f])
	sum b_f if b_f>0
	replace b_f=`r(min)' if b_f<0 //*******************************
	bysort code_o: egen M=sum(expshare_`i'*f)
	gen P_f=-b_f*(f-M)
	drop M 		
	// z
	gen b_z=_b[z]
	replace b_z=0 if b_z<0 //*******************************	
	bysort code_o: egen M=sum(expshare_`i'*lnr)
	bysort code_d: egen MM=sum((1/N)*lnpcincome)	
	gen P_z=b_z*(lnr-M)*(lnpcincome-MM)
	drop M MM
	//
	rename b_* *b_`i'
	rename P_* *P_`i'
}
	   
save "_DataReged.dta", replace

//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//////////////////       Part 3. Zeroe Decomposition     ///////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//------------------------------------------------------------------------------

//--------------------------------- Table 2 ------------------------------------
////////////////////////////////////////////////////////////////////////////////
use "_DataReged.dta", clear
set more off
set matsize 10000 

// reg
gen Y=.
foreach s in t r f z {
	gen X`s'=.
}
eststo clear
forvalues i = 0/25 {
	foreach s in t r f z {
		replace Y=Friction_`i'
		replace X`s'=`s'P_`i'
	}
	constraint define 1 [Xt]Y+[Xr]Y+[Xf]Y+[Xz]Y=1
	constraint define 2 [Xt]_cons+[Xr]_cons+[Xf]_cons+[Xz]_cons=0
	reg3 (Xt Y) (Xr Y) (Xf Y) (Xz Y) if trade_`i'==0, constr(1 2)  // if trade_`i'==0
	local mylabel: variable label ya_`i'
	eststo `mylabel'
}
esttab using "Decomp.csv", b(3) se(2) star(* 0.1 ** 0.05 *** 0.01) ///
        drop(_cons) wide mtitle replace
// output
import delim using "Decomp.csv", stripq(yes) clear
drop in 1
drop in L
drop in L
foreach v of varlist _all {
	replace `v'=subinstr(`v',"=","",.)
}
replace v1=v1[_n-1] if v1=="Y"
drop if v2==""
sxpose, force clear
drop in 1
//
rename (_var1 _var6) (name obs)
gen v=_var2 // 2 3
replace v="_" if name=="Aggregate"
gen g=(name=="")
gen i=_n/2
replace i=i+0.5 if mod(i,1)>0
gsort -v
gen s=_n if g==0
gen id="("+string(s)+")" if g==0
//
sort i g
by i: replace s=s[_n-1] if s==. 
sort s g
order id
drop g i s v
insobs 1, after(2)
//
insobs 2, after(_N)
replace name="Mean" in L
insobs 1, after(_N)
replace name="St. d." in L
foreach i of varlist _var* {
	gen v=`i' 
	replace v=subinstr(v,"*","",.)
	replace v="" if id==""
	destring v, replace
	egen m=mean(v) if name!="Aggregate"
	egen s=sd(v) if name!="Aggregate"
	replace `i'=string(round(m,0.001)) if name=="Mean"
	replace `i'=string(round(s,0.001)) in L
	rename v v`i'
	drop m s
}
//
replace name=" " in 3
replace id=" " in 3
gen n=_n
//
drop v_* n
* the data itself is exactly Table 2.

//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//////////////////            Part 4. Counterfactuals        ///////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//------------------------------------------------------------------------------

///////////////////////////////  bilateral cost cut ////////////////////////////

use "_DataReged.dta", clear
set more off
//
drop if D==1
drop *_0
// 0.1
forvalues i = 1/25 {
	rename *_`i' *_ 
	gen dyy = incshare_/EM+tP_+rP_+fP_+zP_
	gen hatt = dyy+tb_*t*0.1 
	gen hatr = dyy+rb_*r_*0.1 
	gen hatf = dyy+fb_*f*0.1 
	gen dyt = (hatt>0) if trade_==0	  
	gen dyr = (hatr>0) if trade_==0	  	
	gen dyf = (hatf>0) if trade_==0
	drop hatt hatr hatf dyy
	rename dy* Dy1*_`i'
	rename *_ *_`i'
}	   
// 0.5
forvalues i = 1/25 {
	rename *_`i' *_ 
	gen dyy = incshare_/EM+tP_+rP_+fP_+zP_
	gen hatt = dyy+tb_*t*0.5 
	gen hatr = dyy+rb_*r_*0.5	
	gen hatf = dyy+fb_*f*0.5
	gen dyt = (hatt>0) if trade_==0	   
	gen dyr = (hatr>0) if trade_==0	  		
	gen dyf = (hatf>0) if trade_==0
	drop hatt hatr hatf dyy
	rename dy* Dy2*_`i'
	rename *_ *_`i'
}	
// all
forvalues i = 1/25 {
	rename *_`i' *_ 
	gen dyy = incshare_/EM+tP_+rP_+fP_+zP_
	gen hatt = dyy+tb_*t 
	gen hatr = dyy+rb_*r_ 	
	gen hatf = dyy+fb_*f 
	gen dyt = (hatt>0) if trade_==0	   
	gen dyr = (hatr>0) if trade_==0	  			
	gen dyf = (hatf>0) if trade_==0
	drop hatt hatr hatf dyy
	rename dy* Dy3*_`i'
	rename *_ *_`i'
}	
save "counter1.dta", replace

//--------------------------------- Table 3 ------------------------------------
////////////////////////////////////////////////////////////////////////////////
// output: counter1
use "counter1.dta", clear
//
collapse (sum)trade_* (mean)Dy*, by(iso_o)
collapse (mean)trade_* (mean)Dy*
xpose, varname clear
split _varname, parse("_")
drop _varname
destring, replace
reshape wide v*, i(_varname2) j(_varname1) string
rename (*1Dy* _varname2) (** ind)
foreach v of varlist *trade {
	replace `v'=75-`v'
}
//
merge 1:1 ind using "industrylist.dta", keepusing(industry) keep(match) nogen
drop ind
gsort -v3t
gen id="("+string(_n)+")"
order id industry *trade *t *r *f
insobs 2, after(_N)
replace industry="Mean" in L
insobs 1, after(_N)
replace industry="St. d." in L
foreach i of varlist v* {
	gen v=`i'
	egen m=mean(v)
	egen s=sd(v)
	replace `i'=m if industry=="Mean"
	replace `i'=s in L
	drop m s v
}

format *trade %9.0f
format *t *r *f %9.2f
//
drop *trade
* the data itself is exactly Table 3.
////////////////////////////////////////////////////////////////////////////////	

//------------------------- for Figure 4 use -----------------------------------
////////////////////////////////////////////////////////////////////////////////	
use "counter1.dta", clear
//
rename *_6 *_ // leather industry
gen check=.
replace check=0 if trade_==0 & hat_ya_<0
replace check=1 if trade_==0 & hat_ya_>0
gen check1=.
replace check1=0 if trade_==1 & hat_ya_<0
replace check1=1 if trade_==1 & hat_ya_>0
egen id_o=group(gdp_o)
egen id_d=group(gdp_d)
fillin id_o id_d // add internal obs
replace check1=1 if id_o==id_d
replace id_o=76-id_o
replace id_d=76-id_d
keep id_* check*
sort id_o id_d
export delimited using "ZERO_fit.txt", replace
////////////////////////////////////////////////////////////////////////////////	

//--------------------------------- Table 4 ------------------------------------
////////////////////////////////////////////////////////////////////////////////
// output: counter1 income group
use "counter1.dta", clear
//
rename (Dy1t_* Dy1r_* Dy1f_*) (Dyt_* Dyr_* Dyf_*)
drop Dy2* Dy3*
//
gen g=1
replace g=2 if w>8.64
label define gl 1 "Poor" 2 "Rich"
label values g gl
tab g
//
collapse (sum)trade_* (mean)Dy* (first)g, by(iso_o)
collapse (mean)trade_* (mean)Dy*, by(g)
xpose, varname clear
drop in 1
split _varname, parse("_")
drop _varname
destring, replace
reshape wide v*, i(_varname2) j(_varname1) string
rename (*Dy* _varname2) (** ind)
foreach v of varlist *trade {
	replace `v'=75-`v'
}
//
merge 1:1 ind using "industrylist.dta", keepusing(industry) keep(match) nogen
drop ind
sort v1trade
gen id="("+string(_n)+")"
order id industry *trade *t *r *f
insobs 2, after(_N)
replace industry="Mean" in L
insobs 1, after(_N)
replace industry="St. d." in L
foreach i of varlist v* {
	gen v=`i'
	egen m=mean(v)
	egen s=sd(v)
	replace `i'=m if industry=="Mean"
	replace `i'=s in L
	drop m s v
}
format *trade %9.0f
format *t *r *f %9.2f

order id industry v1trade v1t v1r v1f v2trade v2t v2r v2f
* the data itself is exactly Table 4.
////////////////////////////////////////////////////////////////////////////////	
//------------------------------------------------------------------------------

///////////////////////////////  bilateral subsidy /////////////////////////////

use "_DataReged.dta", clear
//
drop if D==1
drop *_0
// 
forvalues i = 1/25 {
	rename *_`i' *_ 
	gen dyy = incshare_/EM+tP_+rP_+fP_+zP_	
	replace dyy=. if dyy>0 | trade_==1
	gen deltat = -dyy/(tb_*t) 
	gen deltar = -dyy/(rb_*r_) 	
	gen deltaf = -dyy/(fb_*f) 
	rename delta? delta?_`i'
	rename *_ *_`i'
	drop dyy
}	   

//--------------------------------- Table 5 ------------------------------------
////////////////////////////////////////////////////////////////////////////////
// output by group
//
rename delta* Dy*
//
gen g=1
replace g=2 if w>8.64 // developed world
label define gl 1 "Poor" 2 "Rich"
label values g gl
tab g
//
forvalues i = 1/25 {
	replace Dyt_`i'=. if Dyt_`i'>10
	replace Dyr_`i'=. if Dyr_`i'>100
	replace Dyf_`i'=. if Dyf_`i'>100
}	
//
collapse (sum)trade_* (mean)Dy* (first)g, by(iso_o)
collapse (mean)trade_* (mean)Dy*, by(g)
xpose, varname clear
drop in 1
split _varname, parse("_")
drop _varname
destring, replace
reshape wide v*, i(_varname2) j(_varname1) string
rename (*Dy* _varname2) (** ind)
foreach v of varlist *trade {
	replace `v'=75-`v'
}
//
merge 1:1 ind using "industrylist.dta", keepusing(industry) keep(match) nogen
drop ind
sort v1trade
gen id="("+string(_n)+")"
order id industry *trade *t *r *f
insobs 2, after(_N)
replace industry="Mean" in L
insobs 1, after(_N)
replace industry="St. d." in L
foreach i of varlist v* {
	gen v=`i'
	egen m=mean(v)
	egen s=sd(v)
	replace `i'=m if industry=="Mean"
	replace `i'=s in L
	drop m s v
}
//
format *trade %9.0f
format *t *r *f %9.2f
order v2*, last
//
* the data itself is exactly Table 5.
////////////////////////////////////////////////////////////////////////////////

	   